Using images of 5 rice species locally grown in Turkey and proposed by the Ankara Universy(dept of Agriculture)
We will only work with archived data all along
images will be selected then streamed from bash
The output will be captured and proccessed using python
The MyRiceImageReader class has been developped for this purpose
!ls -hs
total 220M 220M archive.zip 0 output
total,infos = 0,[]
for spc in MyRiceImgReader.all_species:
SpcStreamer = MyRiceImgReader(spc)
total+=(nimages:=len(SpcStreamer))
infos.append(f'{nimages} Images found for {spc}')
print(f'{total/1e3:.0f}k images found\n','*'*16+'\n','\n'.join(infos),sep='')
75k images found **************** 15000 Images found for Karacadag 15000 Images found for Jasmine 15000 Images found for Ipsala 15000 Images found for Arborio 15000 Images found for Basmati
display_species(
axes = (2,5),
size = (10,5),
spcs = ['Arborio','Basmati','Jasmine','Karacadag','Ipsala'])
ramdomly positionning seeds on frame
previews = [{},(opt:={'enlarge_backgrd':10,'initsize':True,'pile_size':10}),{**opt,**{'pile_size':1}}]
ims = [next(MyRiceImgReader('Arborio',gs=True,max_imgs=1).images_arrays(p)) for p in previews]
fig,axes = plt.subplots(nrows=1,ncols=3)
fig.set_size_inches(10,5)
axes[0].imshow(ims[0])
axes[0].set_title('Gray(75k imgs)')
axes[1].imshow(ims[1])
axes[1].set_title('Gray_enlarged_pile(1000 images)')
axes[2].imshow(ims[2])
axes[2].set_title('Gray_enlarged(75k images)')
Arborio: 0%| | 0/1 [00:00<?, ?it/s]
Arborio: 0%| | 0/1 [00:00<?, ?it/s]
Arborio: 0%| | 0/1 [00:00<?, ?it/s]
Text(0.5, 1.0, 'Gray_enlarged(75k images)')
#print image array size
height,*s = (RGBMatrix:=[*MyRiceImgReader('Arborio',max_imgs=1).images_arrays({})][0]).shape
pixels = height*s[0]*s[1]
print(f'RGB Matrix shape : {RGBMatrix.shape}\nTotal Pixels : {pixels}')
Arborio: 0%| | 0/1 [00:00<?, ?it/s]
RGB Matrix shape : (250, 250, 3) Total Pixels : 187500
#print image array size
height,s = (GrayMatrix:=[*MyRiceImgReader('Arborio',max_imgs=1,gs=True).images_arrays({})][0]).shape
pixels = height*s
print(f'RGB Matrix shape : {GrayMatrix.shape}\nTotal Pixels : {pixels}')
Arborio: 0%| | 0/1 [00:00<?, ?it/s]
RGB Matrix shape : (250, 250) Total Pixels : 62500
n_components,sample = 10,10
GrayFitData = FitData('gray')
GrayEnlargedFitData = FitData('gray_enlarged',preview=opt)
opt['pile_size'] += 9
GrayEnlargedPileFitData = FitData('gray_enlarged_pile',preview=opt)
!ls output/2023-04-01/
fit_data inverters.zip raw_fit_data
GrayFitData.summarise()
| Karacadag | Jasmine | Ipsala | Arborio | Basmati | |
|---|---|---|---|---|---|
| PC_8 | 0.997346 | 0.993554 | 0.993312 | 0.996259 | 0.986906 |
| PC_9 | 0.997635 | 0.994223 | 0.994192 | 0.996667 | 0.988313 |
seedir.seedir(DATA_DIR+'/raw_fit_data',style='emoji')
📁 raw_fit_data/
├─📁 gray/
│ ├─📁 Arborio/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ ├─📁 Basmati/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ ├─📁 Ipsala/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ ├─📁 Jasmine/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ └─📁 Karacadag/
│ ├─📄 cols_0_2000.h5
│ └─📄 cols_2000_2500.h5
├─📁 gray_enlarged/
│ ├─📁 Arborio/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ ├─📁 Basmati/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ ├─📁 Ipsala/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ ├─📁 Jasmine/
│ │ ├─📄 cols_0_2000.h5
│ │ └─📄 cols_2000_2500.h5
│ └─📁 Karacadag/
│ ├─📄 cols_0_2000.h5
│ └─📄 cols_2000_2500.h5
└─📁 gray_enlarged_pile/
├─📁 Arborio/
│ ├─📄 cols_0_2000.h5
│ └─📄 cols_2000_2500.h5
├─📁 Basmati/
│ ├─📄 cols_0_2000.h5
│ └─📄 cols_2000_2500.h5
├─📁 Ipsala/
│ ├─📄 cols_0_2000.h5
│ └─📄 cols_2000_2500.h5
├─📁 Jasmine/
│ ├─📄 cols_0_2000.h5
│ └─📄 cols_2000_2500.h5
└─📁 Karacadag/
├─📄 cols_0_2000.h5
└─📄 cols_2000_2500.h5
test_rate = .25
GrayFitData.save_fit_data(test_rate)
GrayEnlargedFitData.save_fit_data(test_rate)
GrayEnlargedPileFitData.save_fit_data(test_rate)
seedir.seedir(DATA_DIR+'/fit_data',style='emoji')
📁 fit_data/ ├─📁 gray/ │ ├─📄 X.npy │ ├─📄 Xix.npy │ ├─📄 X_test.npy │ ├─📄 X_testix.npy │ ├─📄 y.npy │ └─📄 y_test.npy ├─📁 gray_enlarged/ │ ├─📄 X.npy │ ├─📄 Xix.npy │ ├─📄 X_test.npy │ ├─📄 X_testix.npy │ ├─📄 y.npy │ └─📄 y_test.npy └─📁 gray_enlarged_pile/ ├─📄 X.npy ├─📄 Xix.npy ├─📄 X_test.npy ├─📄 X_testix.npy ├─📄 y.npy └─📄 y_test.npy
m,M = X.min(),X.max()
m,M
(-1474.5641, 2537.4983)
X-=m
X/=(M-m)
X.min(),X.max()
(0.0, 1.0)
pca = MyPCA(X)
pca
<Rice.MyPCA at 0x7f774c101f70>
joblib.dump(pca,prodpath('fit_pca','projections',ext='.joblib'))
['/mnt/d_drive/Rice/rice_data/output/2023-04-01/prod/projections/fit_pca.joblib']
k = 100
pca.c_xplain_var_ratio.round(4)[:k].max()
0.9994
X = joblib.load(prodpath('fit_pca','projections',ext='.joblib')).proj(X,k)
X.shape
(116250, 100)
set(y)
{0, 1, 2, 3, 4}
X, X_val, y, y_val = train_test_split(X, y, test_size=0.5)
X.shape, X_val.shape, y.shape, y_val.shape
((58125, 100), (58125, 100), (58125,), (58125,))
y = to_dummies(y,5)
y_val = to_dummies(y_val,5)
y.shape,y_val.shape
((58125, 5), (58125, 5))
seedir.seedir(DATA_DIR+'/prod/projections/',style='emoji')
📁 projections/ ├─📄 fit_pca.joblib ├─📄 meta.tsv └─📄 vectors.tsv
fig = px.scatter_3d(df(), x='PC0', y='PC1', z='PC2',
color='label2',width=1000,height=800)
fig.update_traces(**{'marker':{'size': 3}})
fig.show()
/tmp/ipykernel_1676/3126959195.py:4: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. df =lambda k=3: pandas.DataFrame(
%%HTML
<iframe src="https://projector.tensorflow.org/?config=https://gist.githubusercontent.com/AbdourahmaneTintou/11cc1b18aa20cce3174f858e5c3bca3b/raw/cf6cb33912fd1dd62fddf42945fbdd67f34d0acd/config.json" style="width: 1500px; height: 900px;"></iframe>
fig = seaborn.displot(data=df(1).reset_index(drop=True), x="PC0", hue="Type", kind="kde", multiple="stack")
fig._figure.set_size_inches(20, 10)
/tmp/ipykernel_1643/3126959195.py:4: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
Logistic = LogisticRegression(max_iter=10000)
Logistic.fit(X,y.values.argmax(axis=1))
logistic_preds = Logistic.predict(X_val)
Logistic
LogisticRegression(max_iter=10000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=10000)
conf_mat = pandas.crosstab(y_val.values.argmax(axis=1),logistic_preds,rownames=['true'],colnames=['preds']).\
rename(
columns=(names:=dict(zip(range(5),MyRiceImgReader.all_species)))
).rename(names)
px.imshow(
conf_mat.pipe(lambda Df : Df/Df.sum()).round(2),text_auto=True,
title=f'Confusion Matrix Logistic Regression (Accuracy {conf_mat.values.trace()*100/len(y_val):.2f}% )')
Forest = RandomForestClassifier(random_state=42,min_samples_split=5,max_depth=None)
Forest.fit(X,y)
forest_preds = Forest.predict(X_val).argmax(axis=1)
Forest
RandomForestClassifier(min_samples_split=5, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(min_samples_split=5, random_state=42)
conf_mat = pandas.crosstab(y_val.values.argmax(axis=1),forest_preds,rownames=['true'],colnames=['preds']).\
rename(columns=names).rename(names)
px.imshow(
conf_mat.pipe(lambda Df : Df/Df.sum()).round(2),text_auto=True,
title=f'Confusion Matrix Random Forest (Accuracy {conf_mat.values.trace()*100/len(y_val):.2f}% ) ')
joblib.dump(Forest, prodpath('Forest','fitted_models',ext='.joblib'))
['/mnt/d_drive/Rice/rice_data/output/2023-04-01/prod/fitted_models/Forest.joblib']
joblib.dump(Logistic,prodpath('Logistic','fitted_models',ext='.joblib'))
['/mnt/d_drive/Rice/rice_data/output/2023-04-01/prod/fitted_models/Logistic.joblib']
_,*s =X.shape
Input = keras.Input(shape=s,name='Input')
Dense0 = keras.layers.Dense(100, activation = 'relu',name='Hidden0')(Input)
Drop0 = keras.layers.Dropout(rate=.1,name='Dropout0')(Dense0)
Dense1 = keras.layers.Dense(50, activation = 'relu',name='Hidden1')(Drop0)
Output = keras.layers.Dense(5, activation = 'softmax',name = 'Output')(Dense1)
MLP = keras.Model(Input, Output,name='MLPerceptron')
MLP.summary()
limited examples to ~6000 due to GPU limits
rootpath = 'output/2023-03-28'
n_components = 10
GrayStreamer = BatchStream(
arrpath('gray','X'),
arrpath('gray','y'),
inverters_zip=f'{rootpath}/inverters.zip',
parent=f'{rootpath}/inverters/gray',
max_dim = n_components,
imshape = (250,250,1),
n_chunks = 100,
as_im = True,
gs = True)
GrayEnlargedStreamer = BatchStream(
arrpath('gray_enlarged','X'),
arrpath('gray_enlarged','y'),
inverters_zip=f'{rootpath}/inverters.zip',
parent=f'{rootpath}/inverters/gray_enlarged',
max_dim = n_components,
imshape = (250,250,1),
n_chunks = 100,
as_im = True,
gs = True)
GrayEnlargedPileStreamer = BatchStream(
arrpath('gray_enlarged_pile','X'),
arrpath('gray_enlarged_pile','y'),
inverters_zip=f'{rootpath}/inverters.zip',
parent=f'{rootpath}/inverters/gray_enlarged',
max_dim = n_components,
imshape = (250,250,1),
n_chunks = 100,
as_im = True,
gs = True)